﻿import requests
import pandas as pd
#分析网页后可以get历史所有双色球中奖数据
url='https://datachart.500.com/ssq/history/newinc/history.php?start=03001'
#获取历史所有双色球中奖数据
response = requests.get(url)
response.encoding = 'utf-8'
re_text = response.text
#网页数据解析
re=re_text.split('<tbody id="tdata">')[1].split('</tbody>')[0]
result=re.split('<tr class="t_tr1">')[1:]
# 历史数据查询
history_data = {'历史数据':[]}
for i in result:
    i=i.replace('<!--<td>2</td>-->','')
    row=i.split('</td>')
    history_balls = []
    for num in range(1,8):
        history_balls.append(row[num].split('>')[1].replace('&nbsp;',''))
    history_data['历史数据'].append(history_balls)
df = pd.DataFrame(history_data)
df.to_excel(r'双色球历史数据.xlsx',index=False)
print("双色球历史数据爬取成功！！！\n按任意键结束......")
input()